########################################################
## PROGRAM NAME: 003_process_00t80.R                  ##
## AUTHOR: MATT MLECZKO                               ##
## DATE CREATED: 04/26/2021                           ##
## INPUTS:                                            ##
##    001_2020_Census_del.Rda                         ##
##    001_1980_trts.Rda                               ##       
##    001_ltdb_80t10.Rda                              ##
##    001_1990_trts.Rda                               ##
##    001_ltdb_90t10.Rda                              ## 
##    002_af_wide_final.Rda                           ##
##                                                    ##
##                                                    ##
## OUTPUTS:                                           ##
##    003_1980_trts.Rda                               ##
##    003_1990_trts.Rda                               ##
##    003_8090_trts.Rda                               ##
##                                                    ## 
## PURPOSE: Create parallel analytic file             ##
##          for 1980 and 1990 (race/ethnicity vars);  ##
##          produce figure 3                          ##
##                                                    ##
## LIST OF UPDATES:                                   ##
########################################################

log <- file("003_process_80t00.txt") 
sink(log, append=TRUE)
sink(log, append=TRUE, type="message")

## load libraries ##

library(tidycensus)
library(tigris)
library(tidyverse)
library(foreign)
library(stringr)
library(tm)
library(gdata)
library(readxl)
library(sf)
library(haven)
library(data.table)
library(ggridges)

## define paths ##
data_path <- "PATH TO DATA HERE"
progs <- "PATH TO PROGRAMS HERE"

## set working directory ##
setwd(data_path)

##
## load necessary functions ## 
##

`%notin%` <- Negate(`%in%`)
source(paste0(progs,"stata_merge.R"))

## load delineation file ##
load("001_2020_Census_del.Rda")

## MSA-FIPS crosswalk ##
fips.to.msas <- census.del.2020.final %>%
  filter(`Metropolitan/Micropolitan Statistical Area` == "Metropolitan Statistical Area") %>%
  select(FIPS,
         `CBSA Code`) %>%
  rename(CBSA = `CBSA Code`)

length(unique(fips.to.msas$CBSA)) == 392

keep.msas <- fips.to.msas %>%
  group_by(CBSA) %>%
  summarize(n=n()) %>%
  select(CBSA)

cbsa.names <- census.del.2020.final %>%
  rename(CBSA = `CBSA Code`,
         CBSA_name = `CBSA Title`) %>%
  select(CBSA,
         CBSA_name) %>%
  distinct()

###########################
## process the 1980 data ##
###########################

##
## convert 1980 tract boundaries to 2010 boundaries ##
##

load("001_1980_trts.Rda")
load("001_ltdb_80t10.Rda")

## pre-merge checks ##
paste("CLASS OF GEOID IN 1980 TRACT FILE")
class(trt1980.init$GEOID)
paste("GEOID CHECK IN 1980 TRACT FILE")
range(nchar(trim(trt1980.init$GEOID)))
paste("IS 1980 TRACT FILE UNIQUE BY GEOID?")
nrow(trt1980.init) == length(unique(trt1980.init$GEOID))

paste("CLASS OF GEOID IN LTDB 80t10")
class(ltdb.80t10$GEOID)
paste("GEOID80 CHECK IN LTDB 80T10")
range(nchar(trim(ltdb.80t10$trtid80)))
paste("GEOID10 CHECK IN LTDB 80T10")
range(nchar(trim(ltdb.80t10$trtid10)))
paste("GEOID CLONE CHECK IN LTDB 80T10")
range(nchar(trim(ltdb.80t10$GEOID)))

## merge the files ## 
trt1980.up.m <- stata.merge(trt1980.init,
                            ltdb.80t10,
                            "GEOID")

## diagnose merge ## 
paste("MERGE BETWEEN 1980 TRACT FILE AND LTDB 80T10")
paste("THERE ARE 6 INVALID OBS IN THE LTDB 80T10")
table(trt1980.up.m$merge.variable)

## keep only the matches and obs with non-zero weights ##
trt1980.match <- trt1980.up.m %>%
  filter(merge.variable == 3 &
         weight != 0) %>%
  select(-merge.variable)

trt1980.nowt <- trt1980.up.m %>%
  filter(merge.variable == 3 & weight == 0)

trt1980.pr1 <- trt1980.match %>%
  select(GEOID,
         trtid80,
         trtid10, 
         everything()) %>%
  mutate_at(vars(pop_total:gq_num),
            .funs = funs(. * weight))

trt1980.pr2 <- trt1980.pr1 %>% 
  group_by(trtid10) %>%
  summarize_at(vars(pop_total:gq_num),
               funs(sum(., na.rm=T))) %>%
  rename(GEOID = trtid10)

## 
## process ##
##

trt1980.fin <- trt1980.pr2 %>%
  mutate(gq_per = ifelse(gq_den > 0, gq_num/gq_den, 0), 
         gq_per_alt = ifelse(pop_total > 0, gq_num/pop_total, 0), 
         per_white = ifelse(pop_total > 0, pop_nh_white/pop_total, 0),
         per_black = ifelse(pop_total > 0, pop_nh_black/pop_total, 0),
         per_asianpl = ifelse(pop_total > 0, pop_nh_asianplus/pop_total, 0),
         per_hl = ifelse(pop_total > 0, pop_hl/pop_total, 0),
         per_other = ifelse(pop_total > 0, pop_nh_other/pop_total, 0),
         log_asianp = case_when(pop_nh_asianplus != 0 ~ log(1/per_asianpl),
                                pop_nh_asianplus == 0 ~ 0),
         log_black = case_when(pop_nh_black != 0 ~ log(1/per_black),
                               pop_nh_black == 0 ~ 0),
         log_hl = case_when(pop_hl != 0 ~ log(1/per_hl),
                            pop_hl == 0 ~ 0),
         log_white = case_when(pop_nh_white != 0 ~ log(1/per_white),
                               pop_nh_white == 0 ~ 0),
         log_other = case_when(pop_nh_other != 0 ~ log(1/per_other),
                               pop_nh_other == 0 ~ 0),
         trt_entropy_er = per_asianpl*log_asianp + 
                          per_black*log_black + 
                          per_hl*log_hl + 
                          per_white*log_white +
                          per_other*log_other,
         trt_entropy_ers = (trt_entropy_er - min(trt_entropy_er))/(max(trt_entropy_er)-min(trt_entropy_er)),
         hd_er = ifelse(trt_entropy_ers >= 0.7414 &
                        per_asianpl < 0.45 & 
                        per_black < 0.45 & 
                        per_hl < 0.45 & 
                        per_white < 0.45 & 
                        per_other < 0.45, 1, 0),
         ld_er = ifelse(trt_entropy_ers <= 0.3707 |
                        per_asianpl > 0.8 |
                        per_black > 0.8 |
                        per_hl > 0.8 |
                        per_white > 0.8 |
                        per_other > 0.8, 1, 0),
         md_er = ifelse(hd_er == 0 & 
                        ld_er == 0, 1, 0)) %>%
  filter(pop_total >= 500 & 
         hhs > 10 & 
         gq_per <= 0.5) %>%
  rename_at(vars(-GEOID),function(x) paste0(x,"_1980"))

##
## determine majority racial group ##
##

trt1980.fin$maj_race_1980 <- c(colnames(trt1980.fin[,c("per_asianpl_1980",
                                                       "per_black_1980",
                                                       "per_hl_1980",
                                                       "per_white_1980",
                                                       "per_other_1980")])[apply(trt1980.fin[,c("per_asianpl_1980",
                                                                                                "per_black_1980",
                                                                                                "per_hl_1980",
                                                                                                "per_white_1980",
                                                                                                "per_other_1980")],
                                                                                                1,which.max)])


###########################
## process the 1990 data ##
###########################

##
## convert 1990 tract boundaries to 2010 boundaries ##
##

load("001_1990_trts.Rda")
load("001_ltdb_90t10.Rda")

## pre-merge checks ##
paste("CLASS OF GEOID IN 1990 TRACT FILE")
class(trt1990.init$GEOID)
paste("GEOID CHECK IN 1990 TRACT FILE")
range(nchar(trim(trt1990.init$GEOID)))
paste("IS 1990 TRACT FILE UNIQUE BY GEOID?")
nrow(trt1990.init) == length(unique(trt1990.init$GEOID))

paste("CLASS OF GEOID IN LTDB 90T10")
class(ltdb.90t10$GEOID)
paste("GEOID90 CHECK IN LTDB 90T10")
range(nchar(trim(ltdb.90t10$trtid90)))
paste("GEOID10 CHECK IN LTDB 90T10")
range(nchar(trim(ltdb.90t10$trtid10)))
paste("GEOID90 CLONE CHECK IN LTDB 90T10")
range(nchar(trim(ltdb.90t10$GEOID)))

## merge the files ## 
trt1990.up.m <- stata.merge(trt1990.init,
                            ltdb.90t10,
                            "GEOID")

## diagnose merge ## 
paste("MERGE BETWEEN 1990 TRACT FILE AND LTDB 90T10")
table(trt1990.up.m$merge.variable)

## keep only the matches and obs with non-zero weights ##
trt1990.match <- trt1990.up.m %>%
  filter(merge.variable == 3 &
           weight != 0) %>%
  select(-merge.variable)

trt1990.nowt <- trt1990.up.m %>%
  filter(merge.variable == 3 & weight == 0)

trt1990.pr1 <- trt1990.match %>%
  select(GEOID,
         trtid90,
         trtid10, 
         everything()) %>%
  mutate_at(vars(pop_total:gq_den),
            .funs = funs(. * weight))

trt1990.pr2 <- trt1990.pr1 %>% 
  group_by(trtid10) %>%
  summarize_at(vars(pop_total:gq_den),
               funs(sum(., na.rm=T))) %>%
  rename(GEOID = trtid10)

## 
## process ##
##

trt1990.fin <- trt1990.pr2 %>%
  mutate(gq_per = ifelse(gq_den > 0, gq_num/gq_den, 0), 
         gq_per_alt = ifelse(pop_total > 0, gq_num/pop_total, 0), 
         per_white = ifelse(pop_total > 0, pop_nh_white/pop_total, 0),
         per_black = ifelse(pop_total > 0, pop_nh_black/pop_total, 0),
         per_aian = ifelse(pop_total > 0, pop_nh_aian/pop_total, 0),
         per_asianpi = ifelse(pop_total > 0, pop_nh_asianpi/pop_total, 0),
         per_hl = ifelse(pop_total > 0, pop_hl/pop_total, 0),
         per_other = ifelse(pop_total > 0, pop_nh_other/pop_total, 0),
         log_asianpi = case_when(pop_nh_asianpi != 0 ~ log(1/per_asianpi),
                                 pop_nh_asianpi == 0 ~ 0),
         log_black = case_when(pop_nh_black != 0 ~ log(1/per_black),
                               pop_nh_black == 0 ~ 0),
         log_hl = case_when(pop_hl != 0 ~ log(1/per_hl),
                            pop_hl == 0 ~ 0),
         log_white = case_when(pop_nh_white != 0 ~ log(1/per_white),
                               pop_nh_white == 0 ~ 0),
         log_aian = case_when(pop_nh_aian != 0 ~ log(1/per_aian),
                              pop_nh_aian == 0 ~ 0),
         log_other = case_when(pop_nh_other != 0 ~ log(1/per_other),
                               pop_nh_other == 0 ~ 0),
         trt_entropy_er = per_asianpi*log_asianpi + 
                          per_black*log_black + 
                          per_hl*log_hl + 
                          per_white*log_white +
                          per_aian*log_aian +
                          per_other*log_other,
         trt_entropy_ers = (trt_entropy_er - min(trt_entropy_er))/(max(trt_entropy_er)-min(trt_entropy_er)),
         hd_er = ifelse(trt_entropy_ers >= 0.7414 &
                        per_asianpi < 0.45 & 
                        per_black < 0.45 & 
                        per_hl < 0.45 & 
                        per_white < 0.45 & 
                        per_aian < 0.45 &
                        per_other < 0.45, 1, 0),
         ld_er = ifelse(trt_entropy_ers <= 0.3707 |
                        per_asianpi > 0.8 |
                        per_black > 0.8 |
                        per_hl > 0.8 |
                        per_white > 0.8 |
                        per_aian > 0.8 |
                        per_other > 0.8, 1, 0),
         md_er = ifelse(hd_er == 0 & 
                        ld_er == 0, 1, 0)) %>%
  filter(pop_total >= 500 & 
         hhs > 10 & 
         gq_per <= 0.5) %>%
  rename_at(vars(-GEOID),function(x) paste0(x,"_1990"))

##
## determine majority/plurality ethnoracial group ##
##

trt1990.fin$maj_race_1990 <- c(colnames(trt1990.fin[,c("per_asianpi_1990",
                                                       "per_aian_1990",
                                                       "per_black_1990",
                                                       "per_hl_1990",
                                                       "per_white_1990",
                                                       "per_other_1990")])[apply(trt1990.fin[,c("per_asianpi_1990",
                                                                                                "per_aian_1990",
                                                                                                "per_black_1990",
                                                                                                "per_hl_1990",
                                                                                                "per_white_1990",
                                                                                                "per_other_1990")],
                                                                            1,which.max)])

##
## merge 1980 and 1990 ## 
##

trtid8090.m <- stata.merge(trt1980.fin,
                           trt1990.fin,
                           "GEOID")

## check merge ##
paste("MERGE BETWEEN 1980 AND 1990 FILES")
table(trtid8090.m$merge.variable, useNA = "ifany")

## keep matches ##
trtid8090 <- trtid8090.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

## check vals ##
paste("1980 SCALED ER ENTROPY")
summary(trtid8090$trt_entropy_ers_1980)
paste("1990 SCALED ER ENTROPY")
summary(trtid8090$trt_entropy_ers_1990)

paste("1980 LD ER BREAKDOWN")
prop.table(table(trtid8090$ld_er_1980, useNA = "ifany"))
paste("1980 MD ER BREAKDOWN")
prop.table(table(trtid8090$md_er_1980, useNA = "ifany"))
paste("1980 HD ER BREAKDOWN")
prop.table(table(trtid8090$hd_er_1980, useNA = "ifany"))

paste("1990 LD ER BREAKDOWN")
prop.table(table(trtid8090$ld_er_1990, useNA = "ifany"))
paste("1990 MD ER BREAKDOWN")
prop.table(table(trtid8090$md_er_1990, useNA = "ifany"))
paste("1990 HD ER BREAKDOWN")
prop.table(table(trtid8090$hd_er_1990, useNA = "ifany"))


## bring on 2000 - 2020 data ##

load("002_af_wide_final.Rda")

paste("2000 - 2020 ER ENTROPY")
summary(af.wide.final$trt_entropy_er_2000)
summary(af.wide.final$trt_entropy_er_2010)
summary(af.wide.final$trt_entropy_er_2020)

paste("2000 - 2020 SCALED ER ENTROPY")
summary(af.wide.final$trt_entropy_ers_2000)
summary(af.wide.final$trt_entropy_ers_2010)
summary(af.wide.final$trt_entropy_ers_2020)

paste("2000 LD-MD-HD BREAKDOWN")
prop.table(table(af.wide.final$ld_er_2000, useNA = "ifany"))
prop.table(table(af.wide.final$md_er_2000, useNA = "ifany"))
prop.table(table(af.wide.final$hd_er_2000, useNA = "ifany"))

paste("2010 LD-MD-HD BREAKDOWN")
prop.table(table(af.wide.final$ld_er_2010, useNA = "ifany"))
prop.table(table(af.wide.final$md_er_2010, useNA = "ifany"))
prop.table(table(af.wide.final$hd_er_2010, useNA = "ifany"))

paste("2020 LD-MD-HD BREAKDOWN")
prop.table(table(af.wide.final$ld_er_2020, useNA = "ifany"))
prop.table(table(af.wide.final$md_er_2020, useNA = "ifany"))
prop.table(table(af.wide.final$hd_er_2020, useNA = "ifany"))


## combine and observe trt trajectories ## 

af.fm <- af.wide.final %>%
  select(GEOID,
         starts_with("trt_entropy"),
         starts_with("ld_er"),
         starts_with("md_er"),
         starts_with("hd_er"),
         starts_with("int_"))

all.decades.m <- stata.merge(trtid8090,
                             af.fm,
                             "GEOID")

## check merge ##
paste("MERGE BETWEEN 8090 FILE AND ANALYTIC FILE")
table(all.decades.m$merge.variable, useNA = "ifany")

## keep matches ##
all.decades <- all.decades.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  mutate(ld_all = ifelse(ld_er_1980 == 1 & ld_er_1990 == 1 & 
                         ld_er_2000 == 1 & ld_er_2010 == 1 & ld_er_2020 == 1, 1, 0),
         md_all = ifelse(md_er_1980 == 1 & md_er_1990 == 1 & 
                         md_er_2000 == 1 & md_er_2010 == 1 & md_er_2020 == 1, 1, 0),
         hd_all = ifelse(hd_er_1980 == 1 & hd_er_1990 == 1 & 
                         hd_er_2000 == 1 & hd_er_2010 == 1 & hd_er_2020 == 1, 1, 0))

paste("BREAKDOWN OF LD ACROSS ALL DECADES")
table(all.decades$ld_all)
paste("BREAKDOWN OF MD ACROSS ALL DECADES")
table(all.decades$md_all)
paste("BREAKDOWN OF HD ACROSS ALL DECADES")
table(all.decades$hd_all)

md <- all.decades %>%
  filter(md_all == 1)

hd <- all.decades %>%
  filter(hd_all == 1)

## 
## prep vars for graph
##

years <- c(rep("1980" , 3) , rep("1990" , 3) , rep("2000" , 3), rep("2010" , 3), rep("2020" , 3))
gr <- rep(c("Low diversity" , "Moderate diversity" , "High diversity") , 5)
value <- c(table(all.decades$ld_er_1980)[2],
           table(all.decades$md_er_1980)[2],
           table(all.decades$hd_er_1980)[2],
           table(all.decades$ld_er_1990)[2],
           table(all.decades$md_er_1990)[2],
           table(all.decades$hd_er_1990)[2],
           table(all.decades$ld_er_2000)[2],
           table(all.decades$md_er_2000)[2],
           table(all.decades$hd_er_2000)[2],
           table(all.decades$ld_er_2010)[2],
           table(all.decades$md_er_2010)[2],
           table(all.decades$hd_er_2010)[2],
           table(all.decades$ld_er_2020)[2],
           table(all.decades$md_er_2020)[2],
           table(all.decades$hd_er_2020)[2])

plot.data <- data.frame(years,
                        gr,
                        value)

##
## plot the data ##
## this is figure 3 ##
##

ggplot(plot.data, aes(fill=gr, y=value, x=years)) + 
  geom_bar(position="fill", stat="identity") +
  xlab("") + 
  ylab("Share") + 
  scale_fill_discrete(name = "") + 
  theme_classic()


#######################################
## create MSA vars by aggregating up ##
#######################################

## now create MSA level data ##
## doing this so that metro pop numbers ##
## are consistent throughout analyses   ##

## 1980 ## 

## start with unweighted vars ##
trt1980.wcbsa <- trt1980.fin %>%
  filter(GEOID %in% all.decades$GEOID) %>%
  mutate(FIPS = substr(GEOID,1,5)) %>%
  left_join(select(census.del.2020.final, `CBSA Code`, FIPS), "FIPS") %>%
  rename(CBSA = `CBSA Code`) 
  
agg.msa.1980 <- trt1980.wcbsa %>%
  group_by(CBSA) %>%
  summarize_at(vars(pop_total_1980:gq_den_1980), sum, na.rm=T) %>%
  mutate(per_asianplus_msa_1980 = case_when(pop_total_1980 != 0 ~ pop_nh_asianplus_1980/pop_total_1980,
                                            pop_total_1980 == 0 ~ 0),
         per_black_msa_1980 = case_when(pop_total_1980 != 0 ~ pop_nh_black_1980/pop_total_1980,
                                        pop_total_1980 == 0 ~ 0),
         per_hl_msa_1980 = case_when(pop_total_1980 != 0 ~ pop_hl_1980/pop_total_1980,
                                         pop_total_1980 == 0 ~ 0),
         per_white_msa_1980 = case_when(pop_total_1980 != 0 ~ pop_nh_white_1980/pop_total_1980,
                                        pop_total_1980 == 0 ~ 0),
         per_other_msa_1980 = case_when(pop_total_1980 != 0 ~ pop_nh_other_1980/pop_total_1980, 
                                        pop_total_1980 == 0 ~ 0),
         log_asianplus_msa_1980 = case_when(pop_nh_asianplus_1980 != 0 ~ log(1/per_asianplus_msa_1980),
                                            pop_nh_asianplus_1980 == 0 ~ 0),
         log_black_msa_1980 = case_when(pop_nh_black_1980 != 0 ~ log(1/per_black_msa_1980),
                                        pop_nh_black_1980 == 0 ~ 0),
         log_hl_msa_1980 = case_when(pop_hl_1980 != 0 ~ log(1/per_hl_msa_1980),
                                         pop_hl_1980 == 0 ~ 0),
         log_white_msa_1980 = case_when(pop_nh_white_1980 != 0 ~ log(1/per_white_msa_1980),
                                        pop_nh_white_1980 == 0 ~ 0),
         log_other_msa_1980 = case_when(pop_nh_other_1980 != 0 ~ log(1/per_other_msa_1980),
                                        pop_nh_other_1980 == 0 ~ 0),
         msa_entropy_er_1980 = per_asianplus_msa_1980*log_asianplus_msa_1980 + 
                               per_black_msa_1980*log_black_msa_1980 + 
                               per_hl_msa_1980*log_hl_msa_1980 + 
                               per_white_msa_1980*log_white_msa_1980 +
                               per_other_msa_1980*log_other_msa_1980,
         msa_entropy_ers_1980 = (msa_entropy_er_1980 - min(msa_entropy_er_1980))/(max(msa_entropy_er_1980)-min(msa_entropy_er_1980))) %>%
  rename(pop_total_msa_1980 = pop_total_1980) %>%
  select(CBSA,
         pop_total_msa_1980,
         per_asianplus_msa_1980,
         per_black_msa_1980,
         per_hl_msa_1980,
         per_white_msa_1980,
         per_other_msa_1980,
         msa_entropy_er_1980,
         msa_entropy_ers_1980)

## now, merge this info back onto tracts ## 

metro.tracts.1980.wmsa.m <- stata.merge(trt1980.wcbsa,
                                        agg.msa.1980,
                                        "CBSA")

## check merge ## 
paste("MERGE BETWEEN 1980 TRACT FILE AND 1980 AGGREGATED MSA FILE")
table(metro.tracts.1980.wmsa.m$merge.variable, useNA = "ifany")

## keep matches ##
metro.tracts.1980.temp <- metro.tracts.1980.wmsa.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  mutate(D_pre_comp_asianpl_1980 = case_when(per_asianpl_1980 !=0 &  per_asianplus_msa_1980 != 0 ~ log(per_asianpl_1980/per_asianplus_msa_1980),
                                      per_asianpl_1980 == 0 | per_asianplus_msa_1980 == 0 ~ 0),
         D_pre_comp_black_1980 = case_when(per_black_1980 !=0 & per_black_msa_1980 != 0 ~ log(per_black_1980/per_black_msa_1980),
                                      per_black_1980 == 0 | per_black_msa_1980 == 0 ~ 0),
         D_pre_comp_hl_1980 = case_when(per_hl_1980 !=0 &  per_hl_msa_1980 != 0 ~ log(per_hl_1980/per_hl_msa_1980),
                                       per_hl_1980 == 0 | per_hl_msa_1980 == 0 ~ 0),
         D_pre_comp_other_1980 = case_when(per_other_1980 !=0 & per_other_msa_1980 != 0 ~ log(per_other_1980/per_other_msa_1980),
                                      per_other_1980 == 0 | per_other_msa_1980 == 0 ~ 0),
         D_pre_comp_white_1980 = case_when(per_white_1980 !=0 &  per_white_msa_1980 != 0 ~ log(per_white_1980/per_white_msa_1980),
                                      per_white_1980 == 0 | per_white_msa_1980 == 0 ~ 0),
         D_comp_asianplus_1980 = case_when(per_asianpl_1980 !=0 &  per_asianplus_msa_1980 != 0 ~ per_asianpl_1980 * log(per_asianpl_1980/per_asianplus_msa_1980),
                                           per_asianpl_1980 == 0 | per_asianplus_msa_1980 == 0 ~ 0),
         D_comp_black_1980 = case_when(per_black_1980 !=0 & per_black_msa_1980 != 0 ~ per_black_1980 * log(per_black_1980/per_black_msa_1980),
                                       per_black_1980 == 0 | per_black_msa_1980 == 0 ~ 0),
         D_comp_hl_1980 = case_when(per_hl_1980 !=0 &  per_hl_msa_1980 != 0 ~ per_hl_1980 * log(per_hl_1980/per_hl_msa_1980),
                                   per_hl_1980 == 0 | per_hl_msa_1980 == 0 ~ 0),
         D_comp_other_1980 = case_when(per_other_1980 !=0 & per_other_msa_1980 != 0 ~ per_other_1980 * log(per_other_1980/per_other_msa_1980),
                                       per_other_1980 == 0 | per_other_msa_1980 == 0 ~ 0),
         D_comp_white_1980 = case_when(per_white_1980 !=0 &  per_white_msa_1980 != 0 ~ per_white_1980 * log(per_white_1980/per_white_msa_1980),
                                       per_white_1980 == 0 | per_white_msa_1980 == 0 ~ 0),
         D_comp_all_1980 = rowSums(cbind(D_comp_asianplus_1980,D_comp_black_1980,D_comp_hl_1980,D_comp_other_1980,D_comp_white_1980),na.rm=T),
         D_comp_final_1980 = (pop_total_1980/pop_total_msa_1980) * D_comp_all_1980) %>%
  group_by(CBSA) %>%
  mutate(tracts_calc_1980 = n(),
         msa_D_1980 = sum(D_comp_final_1980,na.rm=T)) %>%
  ungroup()

metro.tracts.1980.temp$D_int_1980 <- ifelse(metro.tracts.1980.temp$D_comp_all_1980 < median(metro.tracts.1980.temp$D_comp_all_1980,na.rm=T) + 
                                            sd(metro.tracts.1980.temp$D_comp_all_1980,na.rm=T), 1, 0) 

##
## further processing
##

metro.tracts.1980.fin <- metro.tracts.1980.temp %>%
  mutate(int_er_abs_1980 = ifelse(md_er_1980 == 1 | hd_er_1980 == 1, 1, 0),
         int_er_rel_1980 = ifelse((md_er_1980 == 1 | hd_er_1980 == 1) & D_int_1980 == 1, 1, 0),
         int_er_relf_1980 = ifelse((md_er_1980 == 1 | hd_er_1980 == 1) & D_int_1980== 1 & rowSums(cbind(per_hl_1980,per_black_1980),na.rm=T) >=0.2 , 1, 0))

##
## 1990 ## 
##

## start with unweighted vars ##
trt1990.wcbsa <- trt1990.fin %>%
  filter(GEOID %in% all.decades$GEOID) %>%
  mutate(FIPS = substr(GEOID,1,5)) %>%
  left_join(select(census.del.2020.final, `CBSA Code`, FIPS), "FIPS") %>%
  rename(CBSA = `CBSA Code`) 

agg.msa.1990 <- trt1990.wcbsa %>%
  group_by(CBSA) %>%
  summarize_at(vars(pop_total_1990:gq_den_1990), sum, na.rm=T) %>%
  mutate(per_aian_msa_1990 = case_when(pop_total_1990 != 0 ~ pop_nh_aian_1990/pop_total_1990,
                                       pop_total_1990 == 0 ~ 0), 
         per_asianpi_msa_1990 = case_when(pop_total_1990 != 0 ~ pop_nh_asianpi_1990/pop_total_1990,
                                          pop_total_1990 == 0 ~ 0),
         per_black_msa_1990 = case_when(pop_total_1990 != 0 ~ pop_nh_black_1990/pop_total_1990,
                                        pop_total_1990 == 0 ~ 0),
         per_hl_msa_1990 = case_when(pop_total_1990 != 0 ~ pop_hl_1990/pop_total_1990,
                                         pop_total_1990 == 0 ~ 0),
         per_white_msa_1990 = case_when(pop_total_1990 != 0 ~ pop_nh_white_1990/pop_total_1990,
                                        pop_total_1990 == 0 ~ 0),
         per_other_msa_1990 = case_when(pop_total_1990 != 0 ~ pop_nh_other_1990/pop_total_1990, 
                                        pop_total_1990 == 0 ~ 0),
         log_aian_msa_1990 = case_when(pop_nh_aian_1990 != 0 ~ log(1/per_aian_msa_1990),
                                       pop_nh_aian_1990 == 0 ~ 0),
         log_asianpi_msa_1990 = case_when(pop_nh_asianpi_1990 != 0 ~ log(1/per_asianpi_msa_1990),
                                          pop_nh_asianpi_1990 == 0 ~ 0),
         log_black_msa_1990 = case_when(pop_nh_black_1990 != 0 ~ log(1/per_black_msa_1990),
                                        pop_nh_black_1990 == 0 ~ 0),
         log_hl_msa_1990 = case_when(pop_hl_1990 != 0 ~ log(1/per_hl_msa_1990),
                                         pop_hl_1990 == 0 ~ 0),
         log_white_msa_1990 = case_when(pop_nh_white_1990 != 0 ~ log(1/per_white_msa_1990),
                                        pop_nh_white_1990 == 0 ~ 0),
         log_other_msa_1990 = case_when(pop_nh_other_1990 != 0 ~ log(1/per_other_msa_1990),
                                        pop_nh_other_1990 == 0 ~ 0),
         msa_entropy_er_1990 = per_aian_msa_1990 * log_aian_msa_1990 + 
                               per_asianpi_msa_1990*log_asianpi_msa_1990 + 
                               per_black_msa_1990*log_black_msa_1990 + 
                               per_hl_msa_1990*log_hl_msa_1990 + 
                               per_white_msa_1990*log_white_msa_1990 +
                               per_other_msa_1990*log_other_msa_1990,
         msa_entropy_ers_1990 = (msa_entropy_er_1990 - min(msa_entropy_er_1990))/(max(msa_entropy_er_1990)-min(msa_entropy_er_1990))) %>%
  rename(pop_total_msa_1990 = pop_total_1990) %>%
  select(CBSA,
         pop_total_msa_1990,
         per_aian_msa_1990,
         per_asianpi_msa_1990,
         per_black_msa_1990,
         per_hl_msa_1990,
         per_white_msa_1990,
         per_other_msa_1990,
         msa_entropy_er_1990,
         msa_entropy_ers_1990)

## now, merge this info back onto tracts ## 
metro.tracts.1990.wmsa.m <- stata.merge(trt1990.wcbsa,
                                        agg.msa.1990,
                                        "CBSA")

## check merge ## 
paste("MERGE BETWEEN 1990 TRACT FILE AND 1990 AGGREGATED MSA FILE")
table(metro.tracts.1990.wmsa.m$merge.variable, useNA = "ifany")

## keep matches ##
metro.tracts.1990.temp <- metro.tracts.1990.wmsa.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  mutate(D_pre_comp_aian_1990 = case_when(per_aian_1990 !=0 & per_aian_msa_1990 != 0 ~ log(per_aian_1990/per_aian_msa_1990),
                                          per_aian_1990 == 0 | per_aian_msa_1990 == 0 ~ 0),
         D_pre_comp_asianpi_1990 = case_when(per_asianpi_1990 !=0 &  per_asianpi_msa_1990 != 0 ~ log(per_asianpi_1990/per_asianpi_msa_1990),
                                           per_asianpi_1990 == 0 | per_asianpi_msa_1990 == 0 ~ 0),
         D_pre_comp_black_1990 = case_when(per_black_1990 !=0 & per_black_msa_1990 != 0 ~ log(per_black_1990/per_black_msa_1990),
                                           per_black_1990 == 0 | per_black_msa_1990 == 0 ~ 0),
         D_pre_comp_hl_1990 = case_when(per_hl_1990 !=0 &  per_hl_msa_1990 != 0 ~ log(per_hl_1990/per_hl_msa_1990),
                                            per_hl_1990 == 0 | per_hl_msa_1990 == 0 ~ 0),
         D_pre_comp_other_1990 = case_when(per_other_1990 !=0 & per_other_msa_1990 != 0 ~ log(per_other_1990/per_other_msa_1990),
                                           per_other_1990 == 0 | per_other_msa_1990 == 0 ~ 0),
         D_pre_comp_white_1990 = case_when(per_white_1990 !=0 &  per_white_msa_1990 != 0 ~ log(per_white_1990/per_white_msa_1990),
                                           per_white_1990== 0 | per_white_msa_1990 == 0 ~ 0),
         D_comp_aian_1990 = case_when(per_aian_1990 !=0 &  per_aian_msa_1990 != 0 ~ per_aian_1990 * log(per_aian_1990/per_aian_msa_1990),
                                      per_aian_1990 == 0 | per_aian_msa_1990 == 0 ~ 0),
         D_comp_asianpi_1990 = case_when(per_asianpi_1990 !=0 &  per_asianpi_msa_1990 != 0 ~ per_asianpi_1990 * log(per_asianpi_1990/per_asianpi_msa_1990),
                                         per_asianpi_1990 == 0 | per_asianpi_msa_1990 == 0 ~ 0),
         D_comp_black_1990 = case_when(per_black_1990 !=0 & per_black_msa_1990 != 0 ~ per_black_1990 * log(per_black_1990/per_black_msa_1990),
                                       per_black_1990 == 0 | per_black_msa_1990 == 0 ~ 0),
         D_comp_hl_1990 = case_when(per_hl_1990 !=0 &  per_hl_msa_1990 != 0 ~ per_hl_1990 * log(per_hl_1990/per_hl_msa_1990),
                                        per_hl_1990 == 0 | per_hl_msa_1990 == 0 ~ 0),
         D_comp_other_1990 = case_when(per_other_1990 !=0 & per_other_msa_1990 != 0 ~ per_other_1990 * log(per_other_1990/per_other_msa_1990),
                                       per_other_1990 == 0 | per_other_msa_1990 == 0 ~ 0),
         D_comp_white_1990 = case_when(per_white_1990 !=0 &  per_white_msa_1990 != 0 ~ per_white_1990 * log(per_white_1990/per_white_msa_1990),
                                       per_white_1990 == 0 | per_white_msa_1990 == 0 ~ 0),
         D_comp_all_1990 = rowSums(cbind(D_comp_asianpi_1990,D_comp_black_1990,D_comp_hl_1990,D_comp_other_1990,D_comp_white_1990),na.rm=T),
         D_comp_final_1990 = (pop_total_1990/pop_total_msa_1990) * D_comp_all_1990) %>%
  group_by(CBSA) %>%
  mutate(tracts_calc_1990 = n(),
         msa_D_1990 = sum(D_comp_final_1990,na.rm=T)) %>%
  ungroup()

metro.tracts.1990.temp$D_int_1990 <- ifelse(metro.tracts.1990.temp$D_comp_all_1990 < median(metro.tracts.1990.temp$D_comp_all_1990,na.rm=T) + 
                                            sd(metro.tracts.1990.temp$D_comp_all_1990,na.rm=T), 1, 0) 

##
## further processing 
##

metro.tracts.1990.fin <- metro.tracts.1990.temp %>%
  mutate(int_er_abs_1990 = ifelse(md_er_1990 == 1 | hd_er_1990 == 1, 1, 0),
         int_er_rel_1990 = ifelse((md_er_1990 == 1 | hd_er_1990 == 1) & D_int_1990 == 1, 1, 0),
         int_er_relf_1990 = ifelse((md_er_1990 == 1 | hd_er_1990 == 1) & D_int_1990== 1 & rowSums(cbind(per_hl_1990,per_black_1990),na.rm=T) >=0.2 , 1, 0))

##
## now check the results ##
##

paste("1980 INT ER ABS, 1980 INT ER REL, 1980 INT ER RELF")
table(metro.tracts.1980.fin$int_er_abs_1980, useNA = "ifany")
table(metro.tracts.1980.fin$int_er_rel_1980, useNA = "ifany")
table(metro.tracts.1980.fin$int_er_relf_1980, useNA = "ifany")

paste("1990 INT ER ABS, 1980 INT ER REL, 1980 INT ER RELF")
table(metro.tracts.1990.fin$int_er_abs_1990, useNA = "ifany")
table(metro.tracts.1990.fin$int_er_rel_1990, useNA = "ifany")
table(metro.tracts.1990.fin$int_er_relf_1990, useNA = "ifany")

paste("1990 PROPS INT ER ABS, 1980 INT ER REL, 1980 INT ER RELF")
prop.table(table(metro.tracts.1990.fin$int_er_abs_1990, useNA = "ifany"))
prop.table(table(metro.tracts.1990.fin$int_er_rel_1990, useNA = "ifany"))
prop.table(table(metro.tracts.1990.fin$int_er_relf_1990, useNA = "ifany"))

paste("2000 INT ER ABS, 1980 INT ER REL, 1980 INT ER RELF")
table(all.decades$int_er_abs_2000, useNA = "ifany")
table(all.decades$int_er_rel_2000, useNA = "ifany")
table(all.decades$int_er_relf_2000, useNA = "ifany")

paste("2010 INT ER ABS, 1980 INT ER REL, 1980 INT ER RELF")
table(all.decades$int_er_abs_2010, useNA = "ifany")
table(all.decades$int_er_rel_2010, useNA = "ifany")
table(all.decades$int_er_relf_2010, useNA = "ifany")

paste("2020 INT ER ABS, 1980 INT ER REL, 1980 INT ER RELF")
table(all.decades$int_er_abs_2020, useNA = "ifany")
table(all.decades$int_er_rel_2020, useNA = "ifany")
table(all.decades$int_er_relf_2020, useNA = "ifany")

paste("PROP TABLE 1980 INT ER ABS, INT ER REL")
prop.table(table(metro.tracts.1980.fin$int_er_abs_1980, useNA = "ifany"))
prop.table(table(metro.tracts.1980.fin$int_er_rel_1980, useNA = "ifany"))

paste("PROP TABLE 1990 INT ER ABS, INT ER REL")
prop.table(table(metro.tracts.1990.fin$int_er_abs_1990, useNA = "ifany"))
prop.table(table(metro.tracts.1990.fin$int_er_rel_1990, useNA = "ifany"))

paste("PROP TABLE 2000 INT ER ABS, INT ER REL")
prop.table(table(all.decades$int_er_abs_2000, useNA = "ifany"))
prop.table(table(all.decades$int_er_rel_2000, useNA = "ifany"))

paste("PROP TABLE 2010 INT ER ABS, INT ER REL")
prop.table(table(all.decades$int_er_abs_2010, useNA = "ifany"))
prop.table(table(all.decades$int_er_rel_2010, useNA = "ifany"))

paste("PROP TABLE 2020 INT ER ABS, INT ER REL")
prop.table(table(all.decades$int_er_abs_2020, useNA = "ifany"))
prop.table(table(all.decades$int_er_rel_2020, useNA = "ifany"))

## combine 80 and 90 final files ##
metro.tracts.8090.fin.m <- stata.merge(metro.tracts.1980.fin,
                                       metro.tracts.1990.fin,
                                       "GEOID")

## check merge ## 
paste("MERGE BETWEEN 1980 and 1990 TRACT FILES WITH MSA VARS")
table(metro.tracts.8090.fin.m$merge.variable, useNA = "ifany")

## keep matches ##
metro.tracts.8090.fin <- metro.tracts.8090.fin.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

## save files ## 

save(metro.tracts.1980.fin,
     file = "003_1980_trts.Rda")

save(metro.tracts.1990.fin,
     file = "003_1990_trts.Rda")

save(metro.tracts.8090.fin,
     file = "003_8090_trts.Rda")


### END OF PROGRAM ###

sink()
sink(type = "message")
